{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "d:\\AI\\Anaconda3\\lib\\site-packages\\sklearn\\cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
      "  \"This module will be removed in 0.20.\", DeprecationWarning)\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import sys\n",
    "import numpy as np  # 矩阵操作\n",
    "import pandas as pd # SQL数据处理\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt   #画图\n",
    "from sklearn.metrics import r2_score  #评价回归预测模型的性能\n",
    "from sklearn.model_selection import train_test_split # 数据分割\n",
    "# 数据标准化\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "# 线性回归\n",
    "from sklearn.linear_model import LinearRegression\n",
    "# 线性模型，随机梯度下降优化模型参数\n",
    "from sklearn.linear_model import SGDRegressor\n",
    "#岭回归 --> L2正则\n",
    "from sklearn.linear_model import  RidgeCV\n",
    "# Lasso --> L1正则\n",
    "from sklearn.linear_model import LassoCV\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "from sklearn.preprocessing import OneHotEncoder\n",
    "\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.cross_validation import cross_val_score\n",
    "\n",
    "from sklearn.cluster import MiniBatchKMeans\n",
    "from sklearn import metrics\n",
    "from sklearn.decomposition import PCA\n",
    "\n",
    "import time\n",
    "#import cPickle\n",
    "import pickle\n",
    "\n",
    "import scipy.io as sio\n",
    "import scipy.sparse as ss\n",
    "\n",
    "#相似度/距离\n",
    "import scipy.spatial.distance as ssd\n",
    "\n",
    "from collections import defaultdict\n",
    "from sklearn.preprocessing import normalize\n",
    "\n",
    "# 图形出现在Notebook里而不是新窗口\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 设置数据文件路径\n",
    "data_path = 'G:\\\\study\\\\AI\\\\git\\\\machine_learning\\\\week4\\\\'\n",
    "data_train_filename = data_path + 'train.csv'\n",
    "data_test_filename = data_path + 'test.csv'\n",
    "data_full_data_filename = data_path + 'events.csv'\n",
    "\n",
    "if not os.path.exists(data_train_filename):\n",
    "    print('[-] file(%s) is not found!', data_train_filename)\n",
    "    sys.exit(-1)\n",
    "    \n",
    "if not os.path.exists(data_test_filename):\n",
    "    print('[-] file(%s) is not found!', data_test_filename)\n",
    "    sys.exit(-1)\n",
    "    \n",
    "if not os.path.exists(data_full_data_filename):\n",
    "    print('[-] file(%s) is not found!', data_test_filename)\n",
    "    sys.exit(-1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "## 计算ttrain.csv/test.csv中所有的event"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of events in train & test :13418\n"
     ]
    }
   ],
   "source": [
    "uniqueEvents = set()\n",
    "eventIndex = dict()\n",
    "\n",
    "for filename in [data_train_filename, data_test_filename]:\n",
    "    f = open(filename, 'rb')\n",
    "\n",
    "    #忽略第一行（列名字）\n",
    "    f.readline()\n",
    "    \n",
    "    for line in f:    #对每条记录\n",
    "        cols = line.decode().strip().split(\",\")\n",
    "        uniqueEvents.add(cols[1])   #第二列为活动ID\n",
    "    f.close()\n",
    "\n",
    "#重新编码活动索引字典    \n",
    "for i, e in enumerate(uniqueEvents):\n",
    "    eventIndex[e] = i\n",
    "    \n",
    "n_events = len(eventIndex)\n",
    "print(\"number of events in train & test :%d\" % n_events)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "## 数据太多，只对出现在train.csv/test.csv中的数据进行聚集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  (10647, 0)\t0.10411584125907071\n",
      "  (6041, 0)\t0.10411584125907071\n",
      "  (870, 0)\t0.052057920629535355\n",
      "  (12152, 0)\t0.052057920629535355\n",
      "  (365, 0)\t0.052057920629535355\n",
      "  (10884, 0)\t0.052057920629535355\n",
      "  (2926, 0)\t0.052057920629535355\n",
      "  (3497, 0)\t0.052057920629535355\n",
      "  (11349, 0)\t0.10411584125907071\n",
      "  (9187, 0)\t0.052057920629535355\n",
      "  (6273, 0)\t0.052057920629535355\n",
      "  (857, 0)\t0.052057920629535355\n",
      "  (6431, 0)\t0.052057920629535355\n",
      "  (5205, 0)\t0.15617376188860607\n",
      "  (315, 0)\t0.052057920629535355\n",
      "  (8470, 0)\t0.052057920629535355\n",
      "  (11055, 0)\t0.10411584125907071\n",
      "  (6125, 0)\t0.10411584125907071\n",
      "  (5911, 0)\t0.10411584125907071\n",
      "  (11467, 0)\t0.10411584125907071\n",
      "  (266, 0)\t0.10411584125907071\n",
      "  (1370, 0)\t0.052057920629535355\n",
      "  (2067, 0)\t0.052057920629535355\n",
      "  (8382, 0)\t0.2602896031476768\n",
      "  (2271, 0)\t0.052057920629535355\n",
      "  :\t:\n",
      "  (11823, 100)\t0.03527666062976196\n",
      "  (1008, 100)\t0.01763833031488098\n",
      "  (6152, 100)\t0.07055332125952392\n",
      "  (5242, 100)\t0.014110664251904784\n",
      "  (5825, 100)\t0.07055332125952392\n",
      "  (3549, 100)\t0.00881916515744049\n",
      "  (12209, 100)\t0.0934831506688692\n",
      "  (9063, 100)\t0.014110664251904784\n",
      "  (5858, 100)\t0.29103245019553614\n",
      "  (8399, 100)\t0.010582998188928588\n",
      "  (10460, 100)\t0.00881916515744049\n",
      "  (7753, 100)\t0.06173415610208343\n",
      "  (10601, 100)\t0.037040493661250055\n",
      "  (7027, 100)\t0.015874497283392883\n",
      "  (464, 100)\t0.015874497283392883\n",
      "  (10781, 100)\t0.05820649003910723\n",
      "  (12722, 100)\t0.01940216334636908\n",
      "  (6025, 100)\t0.10053848279482158\n",
      "  (8688, 100)\t0.10406614885779777\n",
      "  (1079, 100)\t0.010582998188928588\n",
      "  (2941, 100)\t0.1869663013377384\n",
      "  (9950, 100)\t0.00881916515744049\n",
      "  (11923, 100)\t0.01940216334636908\n",
      "  (3292, 100)\t0.014110664251904784\n",
      "  (9408, 100)\t0.03880432669273816\n"
     ]
    }
   ],
   "source": [
    "fin = open(data_full_data_filename, 'rb')\n",
    "\n",
    "fin.readline()\n",
    "\n",
    "#词频特征\n",
    "eventContMatrix = ss.dok_matrix((n_events, 101))\n",
    "\n",
    "# 机器性能不咋地，用一个很小的数据集交作业。。。\n",
    "max_search_line = 100\n",
    "curr_lines = 0\n",
    "for line in fin.readlines():\n",
    "    curr_lines += 1\n",
    "    if curr_lines > max_search_line:\n",
    "        break\n",
    "    cols = line.decode().strip().split(\",\")\n",
    "    eventId = str(cols[0])\n",
    "    \n",
    "    #if eventIndex.has_key(eventId):  #python2有has_key\n",
    "    if eventIndex.__contains__(eventId):  #python3使用__contains__替换了has_key\n",
    "        i = eventIndex[eventId]\n",
    "        \n",
    "        #从count_x列开始拷贝\n",
    "        for j in range(9, 110):\n",
    "            eventContMatrix[i, j-9] = cols[j]\n",
    "fin.close()\n",
    "\n",
    "#词频，可以考虑我们用这部分特征进行聚类，得到活动的genre\n",
    "eventContMatrix = normalize(eventContMatrix,\n",
    "    norm=\"l2\", axis=0, copy=False)\n",
    "\n",
    "print(eventContMatrix)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 直接使用kmeans进行测试"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def KMeans_detect(K, X):\n",
    "    start = time.time()\n",
    "    print(\"K-means begin with clusters: {}\".format(k))\n",
    "    mb_kmeans = MiniBatchKMeans(n_clusters = K)\n",
    "    mb_kmeans.fit(X)\n",
    "    CH_score = metrics.silhouette_score(X, mb_kmeans.predict(X))\n",
    "    end = time.time()\n",
    "    print(\"CH_score: {}, time elaps: {}\".format(CH_score, int(end - start)))\n",
    "    return CH_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "K-means begin with clusters: 10\n",
      "CH_score: 0.9893552515584246, time elaps: 6\n",
      "K-means begin with clusters: 20\n",
      "CH_score: 0.98950175332577, time elaps: 6\n",
      "K-means begin with clusters: 30\n",
      "CH_score: 0.9893552515584246, time elaps: 5\n",
      "K-means begin with clusters: 40\n",
      "CH_score: 0.9893552515584246, time elaps: 5\n",
      "K-means begin with clusters: 50\n",
      "CH_score: 0.9895758906615512, time elaps: 5\n",
      "K-means begin with clusters: 60\n",
      "CH_score: 0.989629480290549, time elaps: 5\n",
      "K-means begin with clusters: 70\n",
      "CH_score: 0.9899462923862995, time elaps: 6\n",
      "K-means begin with clusters: 80\n",
      "CH_score: 0.9916070755763778, time elaps: 6\n",
      "K-means begin with clusters: 90\n",
      "CH_score: 0.9905594956674865, time elaps: 7\n",
      "K-means begin with clusters: 100\n",
      "CH_score: 0.9897721781785876, time elaps: 7\n"
     ]
    }
   ],
   "source": [
    "fp = open('PE_uniqueEventPairs.pkl', 'rb')\n",
    "ret = pickle.load(fp)\n",
    "fp.close\n",
    "\n",
    "# 根据题目，设置K值范围，进行kmeans测试\n",
    "Ks = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]\n",
    "#Ks = [10]\n",
    "CH_scores = []\n",
    "for k in Ks:\n",
    "    CH_scores.append(KMeans_detect(k, eventContMatrix))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.9893552515584246, 0.98950175332577, 0.9893552515584246, 0.9893552515584246, 0.9895758906615512, 0.989629480290549, 0.9899462923862995, 0.9916070755763778, 0.9905594956674865, 0.9897721781785876]\n"
     ]
    }
   ],
   "source": [
    "print(CH_scores)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 根据不同K，进行聚类绘图，看起来最高点在80的样子得到最好解"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<matplotlib.lines.Line2D at 0x1ce8291c9b0>]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAD8CAYAAABpcuN4AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3XmUFOW9//H3l2VQNlG2KKgs4jImCDiKosH9CjGCuOCGg6jxniRGf24RNHpPUOJyzWrMTdwIYlSUJG4xUUNQ6QYNgyyKyCJRQFwGI7ixOMz398dTHZpxnOlherq6pz+vc/p0d3V11beapj9Tz1P1lLk7IiIiX6VF3AWIiEh+U1CIiEidFBQiIlInBYWIiNRJQSEiInVSUIiISJ0UFCIiUicFhYiI1ElBISIidWoVdwHZ0KVLF+/Vq1fcZYiIFJR58+atc/eu9c3XLIKiV69eVFRUxF2GiEhBMbO3M5lPTU8iIlInBYWIiNRJQSEiInVSUIiISJ0UFCIiUicFhYiI1ElBISIidVJQiEjBeOMNePrpuKsoPgoKESkYEybAqFHw0UdxV1JcFBQiUhDcIZGALVvgkUfirqa4KChEpCAsWwbr1oXH998fby3FRkEhIgUhmQz348bB7NmwfHm89RQTBYWIFIREAjp3hhtvhBYtYOrUuCsqHgoKESkIySQMGQI9esDxx4egqK6Ou6rikFFQmNkwM1tqZivMbHwtr+9tZjPMbJGZPW9mPdNeu9XMXotuZ6ZNvyRanptZl7TpR5vZBjNbEN1uaOxGikhhq6wMfRRHHBGel5fDW2+FvQxpevUGhZm1BO4EhgOlwNlmVlpjttuB+929PzARuDl670nAIGAAMBi42sw6Ru9JAscDtY2HPsvdB0S3iQ3fLBFpTlL9E0ceGe5POQXat1endq5kskdxKLDC3Ve6+xbgYWBkjXlKgRnR45lpr5cCL7h7lbt/BiwEhgG4+3x3f6uR9YtIEUgmoaQEDj44PG/XDk4/PRwmu3FjvLUVg0yCogewOu35mmhauoXAadHjUUAHM+scTR9uZm2j5qVjgD0zWOfhZrbQzP5qZgdmML+INGOJBBxyCOy007ZpY8fCJ5/A44/HV1exyCQorJZpXuP5VcBRZjYfOAp4B6hy92eBp4HZwEPAHKCqnvW9Auzt7gcBdwCP1VqU2cVmVmFmFZWVlRlshogUoo0bYd68bf0TKUOHwl57wZQp8dRVTDIJijVsvxfQE1ibPoO7r3X3U919IHBdNG1DdD8p6ms4gRA6dR797O4fu/un0eOngdbpnd1p893l7mXuXta1a73XBheRAlVRAV988eWgaNECzjsPnn0W3n03ntqKRSZBMRfoZ2a9zawEOAt4In0GM+tiZqllTQDui6a3jJqgMLP+QH/g2bpWZmZfMzOLHh8a1fhh5pskIs1J6simIUO+/Np554VDZB98MLc1FZt6g8Ldq4BLgGeAJcAj7r7YzCaa2YhotqOBpWa2DOgOTIqmtwZmmdnrwF3AmGh5mNmlZraGsIeyyMzuid5zOvCamS0EfgWc5e41m7pEpEgkk7D//tDlS+0KsN9+MHiwjn5qatYcfoPLysq8oqIi7jJEJMuqq0NAnHoq3HNP7fP85jfw/e/DggVw0EG5ra/Qmdk8dy+rbz6dmS0ieWvJkjCkeOr8idqceSa0bq29iqakoBCRvJU60a5mR3a6zp3h5JPhD3+AqvqOqZQdoqAQkbyVSEC3brDPPnXPV14O778fjoCS7FNQiEjeSibD3oTVdjZXmuHDw56Fmp+ahoJCRPLSu+/CypV1NzullJTA2WfDY4/B+vVNX1uxUVCISF6qORBgfcrLYfNmmD696WoqVgoKEclLyWQY22ngwMzmLysL51uo+Sn7FBQikpcSCTj00NCslAmzsFcxa1ZospLsUVCISN757DOYPz/zZqeUMWNCYDzwQNPUVawUFCKSd/75T9i6NbOO7HR77gnHHhuan5rBoBN5Q0EhInknkQh7Bocf3vD3lpfDm2/C7NnZr6tYKShEJO8kk3DggbDrrg1/76mnQtu26tTOJgWFiOSVrVthzpyGNzultG8Pp50G06bBpk3Zra1YKShEJK+89hp8/HHDO7LTlZfDhg3w5JPZq6uYKShEJK9kMhBgfY45Bnr0UPNTtigoRCSvJBKwxx7Qq9eOL6Nly3Co7F//Ch98kLXSipaCQkTySqYDAdanvDz0dzz0UHbqKmYKChHJG6tXw6pVjWt2SiktDcN6TJnS+GUVOwWFiOSNhg4EWJ/y8nCG96uvZmd5xUpBISJ5I5mEdu2yd+3rs86CVq1g6tTsLK9YKShEJG8kEjB4cPhxz4auXeFb3wpjP23dmp1lFiMFhYjkhU8+gUWLstfslFJeHi6CNGNGdpdbTBQUIpIXXnoJqquz05Gd7tvfDkOB6JyKHaegEJG8kEhAixZw2GHZXW6bNqGv4k9/Cnst0nAKChHJC8kk9O8PHTtmf9nl5bBxoy6TuqMUFCISu6qq0PSU7WanlMGDoV8/NT/tKAWFiMRu4cJwVbtsd2SnpC6T+vzz8PbbTbOO5kxBISKxy8ZAgPUZMybc6zKpDaegEJHYJRKw117hUqZNpVcvOOooXSZ1RygoRCRW7tsGAmxq5eWwbFm4JrdkTkEhIrF6+21YuzY3QXH66bDzzurUbigFhYjEKpEI903VkZ2uY0cYNSoMPb55c9Ovr7lQUIhIrJLJ8AP+9a/nZn3l5fDRR/CXv+Rmfc2BgkJEYpVIhLOxW7bMzfqOOw52313NTw2hoBCR2KxfD4sX56bZKaVVKzj33LBHsW5d7tZbyDIKCjMbZmZLzWyFmY2v5fW9zWyGmS0ys+fNrGfaa7ea2WvR7cy06ZdEy3Mz65I23czsV9Fri8xsUGM3UkTy05w54ainXHRkpysvD2eDP/xwbtdbqOoNCjNrCdwJDAdKgbPNrLTGbLcD97t7f2AicHP03pOAQcAAYDBwtZmlRnJJAscDNc+THA70i24XA//X8M0SkUKQSIQmp8GDc7veb3wDBgxQ81OmMtmjOBRY4e4r3X0L8DAwssY8pUBqtPeZaa+XAi+4e5W7fwYsBIYBuPt8d3+rlvWNJISOu/tLQCcz270hGyUihSGZhIEDw1Xtcm3sWJg7F5Ysyf26C00mQdEDWJ32fE00Ld1C4LTo8Sigg5l1jqYPN7O2UfPSMUB9515msj4RKXBbtoQT33Ld7JRy9tlhb0Z7FfXLJCislmk1T4C/CjjKzOYDRwHvAFXu/izwNDAbeAiYA1RlYX2Y2cVmVmFmFZWVlfUsUkTyzfz5YejvXHZkp+veHYYN02VSM5FJUKxh+72AnsDa9Bncfa27n+ruA4HromkbovtJ7j7A3U8ghMDyxq4vWu5d7l7m7mVdu3bNYDNEJJ/kYiDA+pSXw5o1YVRZ+WqZBMVcoJ+Z9TazEuAs4In0Gcysi5mlljUBuC+a3jJqgsLM+gP9gWfrWd8TQHl09NNhwAZ3fzfjLRKRgpBIQO/e4ZyGuJx8Muyyi5qf6lNvULh7FXAJ8AywBHjE3Reb2UQzGxHNdjSw1MyWAd2BSdH01sAsM3sduAsYEy0PM7vUzNYQ9hgWmdk90XueBlYCK4C7ge81fjNFJJ+kBgKMq9kpZeedYfRo+OMf4dNP460ln5k3g/F2y8rKvKKiIu4yRCRDK1aEK8799rfw3/8dby2JBHzzm2Gv4rzz4q0l18xsnruX1TefzswWkZzL5UCA9TniCOjTR81PdVFQiEjOJZPQqRMccEDclWy7TOqMGbB6df3zFyMFhYjkXCIBQ4ZAizz5BTrvvNBv8oc/xF1JfsqTfyYRKRYffghvvJEfzU4pffqEenSZ1NopKEQkp2bPDvdxnj9Rm/LyMJzHvHlxV5J/FBQiklOJBLRuDYccEncl2zvjDGjTRp3atVFQiEhOJZNw8MHhHIZ80qkTjBwZLpO6ZUvc1eQXBYWI5MymTWHE1nxrdkopLw8XM/rb3+KuJL8oKEQkZ+bNC3+t51NHdroTT4Ru3WDKlLgryS8KChHJmdRAgEOGxFvHV0ldJvXJJ+Hf/467mvyhoBCRnEkkwtAd3brFXclXKy+HL76AadPiriR/KChEJCfcw6Gx+drslHLQQeFSqTr6aRsFhYjkxNKl4WS7fO3ITkkN6fHSS7BsWdzV5AcFhYjkRD4NBFifc84Jw4tMnRp3JflBQSEiOZFMQpcusO++cVdSvz32gBNOCEFRXR13NfFTUIhITqQGAjSLu5LMjB0Lb78NL74YdyXxU1CISJN7//1wsaJCaHZKGTkSOnRQpzYoKEQkB/J1IMC6tG0bxn969FH4/PO4q4mXgkJEmlwiEQbcO/jguCtpmPLycC3txx6Lu5J4KShEpMklk2G02DZt4q6kYb75Tdh7bzU/KShEpEl9/jm88kphNTultGgRrn733HOwdm3c1cRHQSEiTWru3DAkRiF1ZKc777xwiOyDD8ZdSXwUFCLSpPJ9IMD67LsvHH54GFG2WC+TqqAQkSaVSMABB8Buu8VdyY4rL4fXXoMFC+KuJB4KChFpMtXVMGdO4TY7pYweDSUlxdupraAQkSbz+uuwfn1hdmSn2203OPnk0E/xxRdxV5N7CgoRaTKFNBBgfcrL4YMP4Nln464k9xQUItJkkkno3h369Im7ksYbNiwMaliMzU8KChFpMolEaHYqlIEA61JSAmefDY8/HprTiomCQkSaxNq18NZbzaPZKWXsWNi8GR55JO5KcktBISJNInX+RKF3ZKcbNAhKS4uv+UlBISJNIpGAnXeGgQPjriR7UpdJTSbhzTfjriZ3FBQi0iSSSRg8GFq3jruS7Dr33BAYxbRXoaAQkaz79NNwFnNzanZK6dkTTjwRfvc72Lgx7mpyQ0EhIln38suwdWvz6shON2FCuGrf3XfHXUluZBQUZjbMzJaa2QozG1/L63ub2QwzW2Rmz5tZz7TXbjWz16LbmWnTe5vZy2a23MymmVlJNP18M6s0swXR7aJsbKiI5E4yGZpnDj887kqaxtCh4XbrrbBpU9zVNL16g8LMWgJ3AsOBUuBsMyutMdvtwP3u3h+YCNwcvfckYBAwABgMXG1mHaP33Ar83N37AR8BF6Ytb5q7D4hu9+zw1olILBIJ+PrXYZdd4q6k6dxwQzgEePLkuCtpepnsURwKrHD3le6+BXgYGFljnlJgRvR4ZtrrpcAL7l7l7p8BC4FhZmbAscD0aL4pwCk7vhkiki+2boWXXmq+zU4pxx4b9phuuQW2bIm7mqaVSVD0AFanPV8TTUu3EDgtejwK6GBmnaPpw82srZl1AY4B9gQ6A+vdveorlnla1Iw13cz2rK0oM7vYzCrMrKKysjKDzRCRXHj1Vfjkk+bZkZ3ODK6/HlatgqlT466maWUSFLWdfF/z8h1XAUeZ2XzgKOAdoMrdnwWeBmYDDwFzgKp6lvkk0Ctqxvo7YW/jyzO73+XuZe5e1rVr1ww2Q0RyITUQYHMPCgjjP5WVwU9+AlVV9c9fqDIJijWEvYCUnsB2V49197Xufqq7DwSui6ZtiO4nRX0NJxACYjmwDuhkZq1qLtPdP3T3zdH0u4GDd2jLRCQWyST06AF77x13JU0vtVexcmXzvlRqJkExF+gXHaVUApwFPJE+g5l1MbPUsiYA90XTW0ZNUJhZf6A/8Ky7O6Ev4/ToPWOBx6P5dk9b9AhgyY5smIjEozkNBJiJk0+Ggw6CSZNC/0xzVG9QRP0IlwDPEH60H3H3xWY20cxGRLMdDSw1s2VAd2BSNL01MMvMXgfuAsak9UtcA1xhZisIfRb3RtMvNbPFZrYQuBQ4v5HbKCI5smoVrFnT/Duy05nBj34Ey5bBo4/GXU3TMG8GVwsvKyvzioqKuMsQKXoPPQTnnAPz5oUB9IpFdTV84xshNBYtghYFciqzmc1z97L65iuQzRGRQpBIQLt20L9/3JXkVosWYa9i8WL485/jrib7FBQikjXJZDi3oFWr+udtbkaPhn33hZtugmbQULMdBYWIZMWGDeEcimI4LLY2LVvCtdeGwRCfeiruarJLQSEiWfHSS6Gtvpg6sms655xwffCJE5vXXoWCQkSyIpkMbfWDB8ddSXxatw4jy1ZUwDPPxF1N9igoRCQrEolwPkGHDnFXEq/ycthrL7jxxuazV6GgEJFG++KLcA2KYm52SikpgWuugdmzYebMuKvJDgWFiDTawoXw+efF25Fd0wUXwB57hL6K5kBBISKNVkwDAWZip53ghz+EF16AWbPirqbxFBQi0mjJZBgEsGfP+uctFt/5DnTrFvoqCp2CQkQaxX3bQICyTdu2cNVV8Nxz4dDhQqagEJFG+de/4L331JFdm+9+Fzp3Lvy9CgWFiDRKMhnutUfxZe3bwxVXwNNPh4ESC5WCQkQaJZGAjh3hwAPjriQ/XXIJdOoUxoAqVAoKEWmUZBKGDAljHcmXdewIl10Gjz0WhiAvRAoKEdlhH30UhtZWs1PdLrssnLFeqHsVCgoR2WGzZ4d7dWTXbddd4Qc/gOnTYUkBXtxZQSEiOyyZDNeeOPTQuCvJf5dfHg6ZnTSp/nnzjYJCRHZYIgEDB4YfQKlbly7wve+Fy8UuXx53NQ2joBCRHbJlC8ydq2anhrjySmjTBn7yk7graRgFhYjskFdegU2b1JHdEN27w8UXw9Sp4UTFQqGgEJEdooEAd8zVV4dDiW++Oe5KMqegEJEdkkxC377wta/FXUlh6dEDLroIfv97WLUq7moyo6AQkQZzD0GhvYkdc8014f622+KtI1MKChFpsOXLobJSHdk7aq+9YOxYuOceWLs27mrqp6AQkQbTQICNN2ECVFXB//5v3JXUT0EhIg2WSISzjfffP+5KClefPjBmDPzud/DBB3FXUzcFhYg0WKp/ooV+QRrl2mth82b46U/jrqRu+mcWkQaprISlS9XslA377gtnngl33gnr1sVdzVdTUIhIg6QGAlRQZMd118Hnn8MvfhF3JV9NQSEiDZJMQkkJHHJI3JU0DwceCKedBnfcAevXx11N7RQUItIgiQQcfDDstFPclTQfP/oRfPwx/OpXcVdSOwWFiGRs06Zw7WedP5FdBx0EI0bAz38eAiPfKChEJGMVFWHUWPVPZN/114empzvvjLuSL1NQiEjGUgMBDhkSbx3NUVkZDB8OP/sZfPZZ3NVsL6OgMLNhZrbUzFaY2fhaXt/bzGaY2SIze97Meqa9dquZvRbdzkyb3tvMXjaz5WY2zcxKoultoucrotd7NX4zRSQbkknYbz/o2jXuSpqn668Ph8n+9rdxV7K9eoPCzFoCdwLDgVLgbDMrrTHb7cD97t4fmAjcHL33JGAQMAAYDFxtZh2j99wK/Nzd+wEfARdG0y8EPnL3fYCfR/OJSMyqqzUQYFM7/HA47rgwrMfGjXFXs00mexSHAivcfaW7bwEeBkbWmKcUmBE9npn2einwgrtXuftnwEJgmJkZcCwwPZpvCnBK9Hhk9Jzo9eOi+UUkRm+8AR99pI7spnbDDfD++3D33XFXsk0mQdEDWJ32fE00Ld1C4LTo8Sigg5l1jqYPN7O2ZtYFOAbYE+gMrHf3qlqW+Z/1Ra9viObfjpldbGYVZlZRWVmZwWaISGNoIMDcGDo03G69NRxllg8yCYra/pr3Gs+vAo4ys/nAUcA7QJW7Pws8DcwGHgLmAFX1LDOT9eHud7l7mbuXdVWDqUiTSyRC30S/fnFX0vxdf30Yfnzy5LgrCTIJijWEvYCUnsB2I6i7+1p3P9XdBwLXRdM2RPeT3H2Au59ACIHlwDqgk5m1qmWZ/1lf9PouwL93YNtEJItS/RNqCG56xx0X+ituuSUcjhy3TIJiLtAvOkqpBDgLeCJ9BjPrYmapZU0A7oumt4yaoDCz/kB/4Fl3d0JfxunRe8YCj0ePn4ieE73+j2h+EYnJe+/Bm2+q2SlXzMJexapVMHVq3NVkEBRRP8ElwDPAEuARd19sZhPNbEQ029HAUjNbBnQHJkXTWwOzzOx14C5gTFq/xDXAFWa2gtAHcW80/V6gczT9CuBLh+OKSG49+WS4V1DkzrBh4dyKn/wkXOAoTtYc/lgvKyvzioqKuMsQaXbcw6imV10Vhpl4+WVo3TruqorH44/DKafAlClQXp795ZvZPHcvq28+nZktIrXavBkuvBCuuAJGjoQXX1RI5NqIESGgJ02CrVvjq0NBISJf8v77cOyx4aibG26A6dOhffu4qyo+ZmFk2WXL4NFH46tDQSEi25k/P7SNz58PjzwCP/6xLnkap1NPhdJSuOmmcHZ8HPTPLyL/8eij2zqsEwk444x465EQ0tddB4sXw5//HFMN8axWRPJJdTX8z//A6NEwcGAYTnzQoLirkpQzzwzX177ppnCAQa4pKESK3Kefwumnw8SJMG4c/OMf0L173FVJupYt4dprYcECeOqp3K9fQSFSxN56KzQ1Pf54uA7CvfdCmzZxVyW1Oecc6N07BHqu9yoUFCJFatYsOOQQePttePppuPxyDc+Rz1q3hgkTQrPgM8/kdt0KCpEidPfd4fDX3XaDf/4TTjwx7ookE2PHwp57wo035navQkEhUkSqquAHP4CLLw4Dz738cugklcJQUgLjx8Ps2TBzZu7Wq6AQKRL//ncYP+jXvw5nWz/1FHTqFHdV0lAXXAC77x76KnJFQSFSBF5/HQ49NPRLTJ4MP/0ptGpV//sk/+y0E1xzDbzwQvj3zAUFhUgz95e/wGGHhcNgZ86E88+PuyJprO98B7p1C30VuaCgEGmm3OG22+Dkk8NV6ebOhSFD4q5KsqFt2zCi73PPwUsvNf36FBQizdCmTWFY6muuCcNwzJoVjpaR5uO734XOneHhh5t+XWqlFGlm1q6FUaPCYa833hjGCdL5Ec1P+/ZhL7FXr6Zfl4JCpBmZOzdc6GbDBvjTn0JgSPPVu3du1qOmJ5Fm4sEHYejQcAbv7NkKCckeBYVIgauuDkM7nHtuOAR27lzo3z/uqqQ5UdOTSAH7+GMYMwaefDKcbX3HHeHsXZFsUlCIFKg33wzXsn7jjXC29fe+p05raRoKCpEC9I9/hMNe3cNIoscdF3dF0pypj0KkwPzmN/Bf/wVf+1roj1BISFNTUIgUiC1bwklW3/8+DB8Oc+ZA375xVyXFQEEhUgDWrQt7Eb/9bTjb+rHHoGPHuKuSYqE+CpE89+qrMGIEvPsuPPBAOAxWJJcUFCJ5wD3sNbz5JqxYEW6pxwsWwK67brt0qUiuKShEcqS6OozDlAqAmqHw8cfb5jULg/jtsw9ceCFcey3ssUd8tUtxU1CIZFFVFaxa9eW9gjffDLdNm7bN26pVGKunb1844ogQCn37hvvevaFNm/i2QySdgkKkgTZtgn/9q/a9grfeCmGRsvPO2378hw3b9niffcIeg64yJ4VAX1ORr7B8OSxa9OW9gzVrQp9CSseO4Yd/0CAYPXpbGPTtG65t3ELHFkqBU1CI1LBmTRhk74EHtk3r1i388B999PZ7BX37hovHaOgMac4UFCKRzz+H22+HW2+FrVtDWJxxRggDnbMgxUxBIUXPPVxO8pprYPXqEA633ZabK4eJFAK1nkpRmzs3HHF0zjnQpQu88AI88ohCQiRdRkFhZsPMbKmZrTCz8bW8vreZzTCzRWb2vJn1THvtNjNbbGZLzOxXZqE118zOjOZfbGa3pc1/vplVmtmC6HZRNjZUJN0778DYseFCPytXwr33htAYOjTuykTyT71BYWYtgTuB4UApcLaZldaY7XbgfnfvD0wEbo7eOwQ4AugPfB04BDjKzDoD/wsc5+4HAt3NLH0MzGnuPiC63dOoLRRJs3Ej3HQT7LtvaG4aPz4c3XTBBdCyZdzVieSnTPYoDgVWuPtKd98CPAyMrDFPKTAjejwz7XUHdgJKgDZAa+B9oA+wzN0ro/n+Dpy2oxtRyNauhVtuCVcne+65cPauZJ87TJsG++8P118fRl9dsgRuvhk6dIi7OpH8lklQ9ABWpz1fE01Lt5BtP/SjgA5m1tnd5xCC493o9oy7LwFWAPubWS8zawWcAuyZtrzTomap6WaWPv0/zOxiM6sws4rKysraZslbmzfD9OnwrW+Fk64mTIAHHwyjg/buDTfcEJpDJDvmzQtNSmedFcZMmjkzfP59+sRdmUhhyCQoajtC3Gs8v4rQpDQfOAp4B6gys32AA4CehHA51syGuvtHwHeBacAs4C0gdT7rk0CvqBnr78CU2opy97vcvczdy7p27ZrBZsRvwQK49NIwZs8ZZ4STucaPh2XLwoBwDz8MBxwQmkb69oVjjoH774fPPou78sL07rswblwYSG/ZMrj77hAaRx8dd2UiBcbd67wBhxP2BFLPJwAT6pi/PbAmenw1cH3aazcAP6zlPRcDt9UyvSWwob4aDz74YM9X69a5//KX7gMGuIN7SYn76NHuf/ube1VV7e9Ztcr9ppvc+/YN7+nQwf2ii9yTSffq6tzWX4g2bnSfNMm9Xbvwef/wh+4bNsRdlUj+ASq8nt9XDz9D9QZFK2Al0JvQ17AQOLDGPF2AFtHjScDE6PGZhL2CVoT+iRnAydFr3aL7XYEFwL7R893TljsKeKm+GvMtKL74wv0vf3E//fTwQwXugwa5//rX7h9+mPlyqqvdX3zR/fzzw48euO+3n/stt7i/807T1V+oqqvdH33UvVev8FmNGuW+YkXcVYnkr6wFRVgW3wKWAW8C10XTJgIjosenA8ujee4B2vi2PYLfAUuA14GfpS3zoWja68BZadNvBhZHgTQT2L+++vIlKJYudR8/3n2PPcIn27mz+2WXuS9Y0Phlf/yx+733uh95ZFh2ixbuJ53kPn26++bNjV9+oXvlFfehQ8Nn07+/+4wZcVckkv8yDQrz9NHNClRZWZlXVFTEsu5PPgknaE2eDMlkGABu+PBwuOW3vw0lJdlf57Jl8Pvfw5Qp4aipzp1hzJjQHn/QQdlfXz577z247rrw+XfpEvp3LrxQh7qKZMLM5rl7Wb3zKSgazh1efDH8OD36aBgjaP/9ww/1eeeFEUNzYeuOujNDAAAGDklEQVTWcEjtfffB44/Dli1hBNNx48KZxrvtlps64rBpE/zylzBpUnh86aXhsNdddom7MpHCoaBoAqtXh7/iJ08Oh6926BAOuRw3Dg47LN4RRD/8EB56KITG/PlhT+aUU0JtJ5zQfP7Cdoc//xmuuipcE2LEiDCQX79+cVcmUngUFFmyaRM89lj4Af7738MP1THHhB/gU0+Fdu2aZLWNsnBhCLMHHggB0qNHGK7i/PML+wd1wQK4/HJ4/nk48ED4xS/g+OPjrkqkcCkoGsE9HG9/333hr/T162GvvcIP7fnnh5PiCsHmzfDUUyE0/vrXcNb3N78ZQu6MM6B9+7grzMwHH8CPfgT33BOa0268Eb7zHV0dTqSxFBQ74IMPwl/hkyfDa6/BTjuFvYZx4+DYYwv7SmVr18LUqWHbli4Ne0KjR4dtO/LI/LzwzubNcMcdIRg+/xx+8IPQD7HrrnFXJtI8KCgy9MUX4a/tyZPDX99VVWFE0XHjQv9Dp05ZLjZm7jBnTtjeadPCUVv77BO2t7wcevasfxm5qPGJJ+DKK8PlR086CX76U9hvv7grE2leFBQZeOopuOgieP/9cKnL8vLQtHTggdmvMR999hn88Y8hNJ5/Puwx7btv/HtOmzaFgwVKS+FnP4MTT4y3HpHmKtOgKOpW3j59wtFKF1wQzn1o3TruinKrXbsQjuXl4Yd5yhR4/fW4qwquvDKMqKt+CJH4FfUehYhIMct0j6KAu2dFRCQXFBQiIlInBYWIiNRJQSEiInVSUIiISJ0UFCIiUicFhYiI1ElBISIidWoWJ9yZWSXwdtx1NFIXYF3cReQRfR7b0+exjT6L7TXm89jb3bvWN1OzCIrmwMwqMjlDsljo89iePo9t9FlsLxefh5qeRESkTgoKERGpk4Iif9wVdwF5Rp/H9vR5bKPPYntN/nmoj0JEROqkPQoREamTgiIGZranmc00syVmttjMLoum72Zmz5nZ8ui+aK4ObWYtzWy+mT0VPe9tZi9Hn8U0MyuJu8ZcMbNOZjbdzN6IviOHF/l34/Lo/8lrZvaQme1ULN8PM7vPzD4ws9fSptX6XbDgV2a2wswWmdmgbNWhoIhHFXClux8AHAZ838xKgfHADHfvB8yInheLy4Alac9vBX4efRYfARfGUlU8fgn8zd33Bw4ifC5F+d0wsx7ApUCZu38daAmcRfF8P34PDKsx7au+C8OBftHtYuD/slWEgiIG7v6uu78SPf6E8EPQAxgJTIlmmwKcEk+FuWVmPYGTgHui5wYcC0yPZimmz6IjMBS4F8Ddt7j7eor0uxFpBexsZq2AtsC7FMn3w91fBP5dY/JXfRdGAvd78BLQycx2z0YdCoqYmVkvYCDwMtDd3d+FECZAt/gqy6lfAD8EqqPnnYH17l4VPV9DCNJi0AeoBCZHTXH3mFk7ivS74e7vALcDqwgBsQGYR/F+P+Crvws9gNVp82Xtc1FQxMjM2gN/BP6fu38cdz1xMLNvAx+4+7z0ybXMWiyH57UCBgH/5+4Dgc8okmam2kTt7yOB3sAeQDtCE0tNxfL9qEuT/b9RUMTEzFoTQuIP7v6naPL7qV3F6P6DuOrLoSOAEWb2FvAwoUnhF4Td5lbRPD2BtfGUl3NrgDXu/nL0fDohOIrxuwFwPPAvd6909y+APwFDKN7vB3z1d2ENsGfafFn7XBQUMYja4O8Flrj7z9JeegIYGz0eCzye69pyzd0nuHtPd+9F6KT8h7ufC8wETo9mK4rPAsDd3wNWm9l+0aTjgNcpwu9GZBVwmJm1jf7fpD6Povx+RL7qu/AEUB4d/XQYsCHVRNVYOuEuBmZ2JDALeJVt7fLXEvopHgH2IvwHOcPda3ZkNVtmdjRwlbt/28z6EPYwdgPmA2PcfXOc9eWKmQ0gdOyXACuBcYQ/6oryu2FmPwbOJBwtOB+4iND23uy/H2b2EHA0YYTY94H/AR6jlu9CFKS/Jhwl9Tkwzt0rslKHgkJEROqipicREamTgkJEROqkoBARkTopKEREpE4KChERqZOCQkRE6qSgEBGROikoRESkTv8fZ0zz4ZeFNbQAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1cec484eeb8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.plot(Ks, np.array(CH_scores), 'b-')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
